##########################
### Matching  ############
##########################
# covariates and dataset depend on the comparison
# questions to bowydenbraber@gmail.com

#date
library(MatchIt)
library(Matching)
library(rgenoud)

q<-NULL
q <- list()
#matching covariates (depends on comparison)
q <- c(q,list(covmatchitindicators=c("covars"))) 


#load packages
library(MatchIt)
library(Matching)
library(rgenoud)
library(cobalt)


#read file and add two constants
cu.in<-read.csv("CU00_PREPARED_FOR_MATCHING.csv")

#tr is the treatment
cu.in$constant <- 1
cu.in$constant2 <- 1

#it's important that the first variable is constant2
vars_to_test <- c("constant2","etc.")
v <- NULL
vtot <- 100
k <- 1
#this string collects variables that improve balance
intermediate<-"constant"
added_vars <- NULL
counter<-1

while(k<100){
  print("going")
  for(i in 1:length(vars_to_test)){
    #i<-1
    print("start")
    print(paste0("vars_to_tests",vars_to_test))
    #print(vars_to_test)
    print(paste0("specific var to test this round:",vars_to_test[i]))
    intermediate <- c(intermediate,vars_to_test[i])
    print(paste0("intermediate: ",intermediate))
    q$covmatchitindicators2 <- q$covmatchitindicators[!(q$covmatchitindicators %in% intermediate)]
    matching_output <- matchit(as.formula(paste("tr~",paste(c(q$covmatchitindicators2),collapse="+")))
                               ,data = cu.in
                               ,method = "full"
                               ,discard = "control"
                               #,caliper = #add caliper for robustness check
                               ,exact = c("mun.state.num")
                               
    )
    
    summary(matching_output,standardize = TRUE,addlvariables = intermediate,data=cu.in)
    summary_matched <- summary(matching_output,standardize = TRUE,addlvariables = intermediate,data=cu.in)
    
    nobal <- abs(summary_matched$sum.matched[,3])>0.25
    
    summary_matched$nn[,2]
    
    nobal_names <- as.data.frame(summary_matched$sum.matched[,3][nobal])
    
    print("vars without balance")
    print(nobal_names)
    
    
    final <- nrow(nobal_names)
    
    finalmean <- mean(abs(nobal_names[,1]))
    
    if(is.na(finalmean)){
      k<-102
      print("Balance reached!")
    }else{
      print("no balance yet")
    }
    
    print("matched number")
    print(summary_matched$nn[4,2])
    print("unmatched number")
    print(summary_matched$nn[5,2])
    
    print("mean st diff of unbalanced vars")
    print(finalmean)
    v <- c(v,finalmean)
    if(i == length(vars_to_test)){
      value_batch <- min(v)
      print(paste0("end of batch - mean st diff of unbalanced vars: ",value_batch))
      #print(value_batch)
      lowest <- which.min(v)
      print(paste0("lowest of batch: ",vars_to_test[lowest]))
      v <- NULL
      if(value_batch<vtot){
        counter <- counter+1
        vtot<-value_batch
        x <- vars_to_test[lowest]
        print(paste0("Add to intermediate: ",x))
        added_vars<-c(added_vars,x) 
        intermediate <- c("constant",added_vars)
        vars_to_test <- vars_to_test[-lowest]
      }else{
        k <- 101
        print(vtot)
        intermediate<-intermediate[1:counter]
        print("the end")
      }
    }else{
      print("not end of this batch yet")
      intermediate<-intermediate[1:counter]
    }
    
    
    
  }
}

#remaining vars
#vars_to_test

#if balance is reached it will write "Balance reached!"
#otherwise it will print the variables which resulted in lowest mean difference of unbalanced covariates. Constant is also displayed, but can be ignored
#if the best match is only using the constants, then adding variables is pointless


finalresult<-intermediate
print("These variables result in the lowest mean difference of unbalanced vars:")
finalresult

#based on finalresult make a vector "finalmatchingcovariates" for variables that when included improve matching and "additionalcovariates" for variables that when excluded improve matching
finalmatchingcovariates
additionalcovariates
#matching code
matching_output <- matchit(as.formula(paste("tr~",paste(c(finalmatchingcovariates),collapse="+")))
                           ,data = cu.in
                           ,method = "full"
                           #,caliper = #add caliper for robustness check
                           ,exact = c("mun.state.num")
                           ,discard = "control"
)

summary(matching_output,standardize = TRUE,addlvariables = additionalcovariates,data=cu.in)


full.data<-match.data(matching_output)
write.csv(full.data,"MATCHING_OUTPUT.csv")
